#set working directory

setwd("C:/Users/Aashi/Desktop/DGE")

##loading library

# load the libraries    
library(DESeq2)
library(apeglm)
library(ggplot2)
library(pheatmap)
library(cowplot)
library(hrbrthemes)
library(viridis)
library(plotly)
library(dplyr)
library(tidyverse)
library(org.Hs.eg.db)
library(fgsea)
library("msigdbr")

###DGE

# DGE
##loading count matrix
gse = read.csv("GSE92945_Fibroblast_RNAseq_counts_1.csv", header = TRUE, row.names = 1)
info = read.table("ind.txt", header = T,sep ='\t') 

##creating a data matrix  
dds = DESeqDataSetFromMatrix(round(gse), info, ~conditions)

##removing lowly expressed genes
keep = rowSums(counts(dds)) >= 10
dds = dds[keep,]

##main deseq
ddsDE = DESeq(dds)

##export normalized read counts
normCOunts = counts(ddsDE, normalized = T)
write.csv(normCOunts,"normalised_data.csv")

##DESEQ results
res = results(ddsDE, alpha = 0.05)

##output DESeq result
resOrdered = res[order(res$padj),]
write.csv(resOrdered, "resOrdered1.csv")

#plotting

# loading and preparing data for plotting 
##pca and disp(plots)
vsat = vst(ddsDE, blind = FALSE)

pca = plotPCA(vsat, intgroup = "conditions")
disp = plotDispEsts(ddsDE)

ggplotly(pca)
##load_the_deseq2_results_data_and_metadata  

normCOunt = read.csv('normalised_data.csv', row.names = 1)

deSeqRes = read.csv('resOrdered.csv',row.names = 1)

deSeqRes$sig =ifelse(deSeqRes$padj <= 0.05, 'yes', 'no')

deSeqRes = na.omit(deSeqRes)

##ggplots

# ggplots
##ma_plot 
maplot = ggplot(deSeqRes, aes(x = log10(baseMean), y = log2FoldChange, color = sig)) + geom_point(alpha = 0.7)+scale_size(range = c(1.4, 19), name = "MA PLOT")+ scale_color_viridis(discrete = TRUE, guide=FALSE)+theme_ipsum()+theme(legend.position = "none")
  
 

##volcano plot 
volcano = ggplot(deSeqRes, aes(x = log2FoldChange, y = -log10(padj), color = sig)) + geom_point(alpha = 0.7)+scale_size(range = c(1.9, 20), name = "MA PLOT")+ scale_color_viridis(discrete = TRUE, guide=FALSE)+theme_ipsum()+theme(legend.position = "none")
volcano = volcano + ylim(0,15)



##pheatmap
signi = subset(deSeqRes, padj <= 0.05)
allsig= merge(normCOunt, signi, by = 0)

sigcounts = allsig[,2:11]
row.names(sigcounts) = allsig$Row.names 
pheat = pheatmap(log2(sigcounts + 1), scale = 'row', show_rownames = F, treeheight_row = 0, treeheight_col = 0)

###cowplot
plot_grid(volcano,pheat[[4]])

plot_grid(maplot,volcano)

###interactive plot using ggplotly 
ggplotly(maplot, tooltip = "text")
ggplotly(volcano)

#GSEA


```r
# GSEA
 
rest <- read_csv("resOrdered1.csv")
head(rest)
## # A tibble: 6 x 7
##   ...1            baseMean log2FoldChange lfcSE  stat   pvalue     padj
##   <chr>              <dbl>          <dbl> <dbl> <dbl>    <dbl>    <dbl>
## 1 ENSG00000189223     756.           4.24 0.533  7.96 1.77e-15 1.64e-11
## 2 ENSG00000198848    1017.         -10.1  1.26  -7.98 1.41e-15 1.64e-11
## 3 ENSG00000180155    1057.          -3.08 0.398 -7.75 9.11e-15 3.55e-11
## 4 ENSG00000125730    8568.          -8.43 1.09  -7.74 9.60e-15 3.55e-11
## 5 ENSG00000243137    6824.          -8.62 1.11  -7.76 8.56e-15 3.55e-11
## 6 ENSG00000133048   10922.         -11.8  1.55  -7.64 2.23e-14 6.89e-11
rest = rest %>%
  rename(gene = ...1 )
##creating a mapping table
library(org.Hs.eg.db)
ens2symbol = AnnotationDbi::select(org.Hs.eg.db,keys = rest$gene, 
                                   columns="SYMBOL",
                                   keytype="ENSEMBL")
ens2symbol <- as_tibble(ens2symbol)
view(ens2symbol)

rest <- inner_join(rest, ens2symbol, by=c("gene"="ENSEMBL"))
rest
## # A tibble: 24,124 x 8
##    gene            baseMean log2FoldChange lfcSE  stat   pvalue     padj SYMBOL 
##    <chr>              <dbl>          <dbl> <dbl> <dbl>    <dbl>    <dbl> <chr>  
##  1 ENSG00000189223     756.           4.24 0.533  7.96 1.77e-15 1.64e-11 PAX8-A~
##  2 ENSG00000198848    1017.         -10.1  1.26  -7.98 1.41e-15 1.64e-11 CES1   
##  3 ENSG00000180155    1057.          -3.08 0.398 -7.75 9.11e-15 3.55e-11 LYNX1  
##  4 ENSG00000125730    8568.          -8.43 1.09  -7.74 9.60e-15 3.55e-11 C3     
##  5 ENSG00000243137    6824.          -8.62 1.11  -7.76 8.56e-15 3.55e-11 PSG4   
##  6 ENSG00000133048   10922.         -11.8  1.55  -7.64 2.23e-14 6.89e-11 CHI3L1 
##  7 ENSG00000106018     802.          -9.12 1.20  -7.58 3.43e-14 9.06e-11 VIPR2  
##  8 ENSG00000114805     337.          -8.70 1.17  -7.43 1.08e-13 2.50e-10 PLCH1  
##  9 ENSG00000162654    1180.          -7.96 1.10  -7.21 5.43e-13 1.12e- 9 GBP4   
## 10 ENSG00000168675     639.          -9.73 1.36  -7.15 8.91e-13 1.65e- 9 LDLRAD4
## # ... with 24,114 more rows
res2 <- rest %>% 
  dplyr::select(SYMBOL, stat) %>% 
  na.omit() %>% 
  distinct() %>% 
  group_by(SYMBOL) %>% 
  summarize(stat=mean(stat))
res2
## # A tibble: 18,302 x 2
##    SYMBOL      stat
##    <chr>      <dbl>
##  1 A1BG      1.85  
##  2 A1BG-AS1 -1.93  
##  3 A2M      -3.50  
##  4 A2M-AS1  -2.30  
##  5 A3GALT2  -1.43  
##  6 A4GALT   -2.17  
##  7 AAAS      0.0828
##  8 AACS     -0.877 
##  9 AADAC     0.838 
## 10 AADACL4  -0.769 
## # ... with 18,292 more rows
ranks <- deframe(res2)
head(ranks, 20)
##        A1BG    A1BG-AS1         A2M     A2M-AS1     A3GALT2      A4GALT 
##  1.84980065 -1.93257957 -3.49846494 -2.30188355 -1.42653887 -2.16739135 
##        AAAS        AACS       AADAC     AADACL4     AADACP1       AADAT 
##  0.08283153 -0.87690060  0.83765433 -0.76913555 -0.21984127  0.26320865 
##       AAGAB        AAK1       AAMDC        AAMP       AANAT        AAR2 
##  0.84319947  1.06627871  1.05496098 -0.01067306  1.34247123  0.72511821 
##        AARD       AARS1 
## -2.84723813 -0.30592030
####loading reference enrichment database 

Hs.GOBP <- msigdbr(species = "Homo sapiens", category = "C5", subcategory = "BP")
Hs.Reactome <- msigdbr(species = "Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
Hs.Hallmark <- msigdbr(species = "Homo sapiens", category = "H")

Hs.GOBP.Entrez <- Hs.GOBP %>% split(x = .$entrez_gene, f = .$gs_name)
Hs.Hallmark.Entrez <- Hs.Hallmark %>% split(x = .$entrez_gene, f = .$gs_name)
Hs.GOBP.Symbol <- Hs.GOBP %>% split(x = .$gene_symbol, f = .$gs_name)
Hs.Hallmark.Symbol <- Hs.Hallmark %>% split(x = .$gene_symbol, f = .$gs_name)



#####Now that we have everything lets run fgsea on it  
fgseaRes.Hallmark <- fgsea::fgseaMultilevel(pathways=Hs.Hallmark.Symbol,stats = ranks, eps =0)

GSEA but prettier

# Make the Results tidy
fgseaResTidy <- fgseaRes.Hallmark %>%
  as_tibble() %>%
  arrange(desc(NES))

fgseaResTidy %>% 
  dplyr::select(-leadingEdge, -ES) %>% 
  arrange(padj) %>% 
  DT::datatable()
head(fgseaResTidy,10)
## # A tibble: 10 x 8
##    pathway                       pval     padj log2err    ES   NES  size leadi~1
##    <chr>                        <dbl>    <dbl>   <dbl> <dbl> <dbl> <int> <list> 
##  1 HALLMARK_E2F_TARGETS      3.62e-55 1.81e-53   1.94  0.737  3.60   200 <chr>  
##  2 HALLMARK_MYC_TARGETS_V1   2.03e-47 5.07e-46   1.80  0.712  3.48   199 <chr>  
##  3 HALLMARK_G2M_CHECKPOINT   1.36e-36 2.27e-35   1.58  0.659  3.22   199 <chr>  
##  4 HALLMARK_MYC_TARGETS_V2   5.52e-14 3.45e-13   0.965 0.704  2.78    58 <chr>  
##  5 HALLMARK_OXIDATIVE_PHOSP~ 7.08e-17 5.90e-16   1.06  0.513  2.51   200 <chr>  
##  6 HALLMARK_MTORC1_SIGNALING 4.14e-15 2.96e-14   0.997 0.498  2.43   199 <chr>  
##  7 HALLMARK_UNFOLDED_PROTEI~ 4.25e-11 2.12e-10   0.851 0.537  2.41   112 <chr>  
##  8 HALLMARK_DNA_REPAIR       1.66e-11 9.23e-11   0.863 0.498  2.34   149 <chr>  
##  9 HALLMARK_GLYCOLYSIS       1.87e- 8 8.49e- 8   0.734 0.421  2.06   191 <chr>  
## 10 HALLMARK_MITOTIC_SPINDLE  2.59e- 7 9.97e- 7   0.675 0.392  1.92   197 <chr>  
## # ... with abbreviated variable name 1: leadingEdge
##plotting_the_results 
gea_plot =  ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) +
  geom_col(aes(fill=padj<0.05)) +
  coord_flip() +
  labs(x="Pathway", y="Normalized Enrichment Score",
       title="Hallmark pathways NES from GSEA") + 
  theme_minimal()
ggplotly(gea_plot)  

#playing around with different pathways

# trying_out_in_different_pathways
##kegg
kegg = fgsea(pathways=gmtPathways("c2.cp.kegg.v6.2.symbols.gmt"), ranks) %>% 
  as_tibble() %>% 
  arrange(padj)%>%
  DT::datatable()

##go
go = fgsea(pathways=gmtPathways("c5.all.v6.2.symbols.gmt"), ranks) %>%
  as_tibble()%>%
  arrange(padj)%>%
  DT::datatable()

##miR

miR = fgsea(pathways=gmtPathways("c3.mir.v6.2.symbols.gmt"), ranks) %>%
  as_tibble()%>%
  arrange(padj)%>%
  DT::datatable()

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] msigdbr_7.5.1               fgsea_1.20.0               
##  [3] org.Hs.eg.db_3.14.0         AnnotationDbi_1.56.2       
##  [5] forcats_0.5.2               stringr_1.5.0              
##  [7] purrr_0.3.5                 readr_2.1.3                
##  [9] tidyr_1.2.1                 tibble_3.1.8               
## [11] tidyverse_1.3.2             dplyr_1.0.10               
## [13] plotly_4.10.1               viridis_0.6.2              
## [15] viridisLite_0.4.1           hrbrthemes_0.8.0           
## [17] cowplot_1.1.1               pheatmap_1.0.12            
## [19] ggplot2_3.4.0               apeglm_1.16.0              
## [21] DESeq2_1.34.0               SummarizedExperiment_1.24.0
## [23] Biobase_2.54.0              MatrixGenerics_1.6.0       
## [25] matrixStats_0.63.0          GenomicRanges_1.46.1       
## [27] GenomeInfoDb_1.30.1         IRanges_2.28.0             
## [29] S4Vectors_0.32.4            BiocGenerics_0.40.0        
## 
## loaded via a namespace (and not attached):
##   [1] snow_0.4-4             readxl_1.4.1           backports_1.4.1       
##   [4] fastmatch_1.1-3        systemfonts_1.0.4      plyr_1.8.8            
##   [7] lazyeval_0.2.2         splines_4.1.1          crosstalk_1.2.0       
##  [10] BiocParallel_1.28.3    digest_0.6.31          htmltools_0.5.4       
##  [13] fansi_1.0.3            magrittr_2.0.3         memoise_2.0.1         
##  [16] googlesheets4_1.0.1    tzdb_0.3.0             Biostrings_2.62.0     
##  [19] annotate_1.72.0        modelr_0.1.10          extrafont_0.18        
##  [22] vroom_1.6.0            extrafontdb_1.0        bdsmatrix_1.3-6       
##  [25] timechange_0.1.1       colorspace_2.0-3       blob_1.2.3            
##  [28] rvest_1.0.3            haven_2.5.1            xfun_0.35             
##  [31] crayon_1.5.2           RCurl_1.98-1.9         jsonlite_1.8.4        
##  [34] genefilter_1.76.0      survival_3.2-11        glue_1.6.2            
##  [37] gtable_0.3.1           gargle_1.2.1           zlibbioc_1.40.0       
##  [40] XVector_0.34.0         DelayedArray_0.20.0    Rttf2pt1_1.3.11       
##  [43] scales_1.2.1           mvtnorm_1.1-3          DBI_1.1.3             
##  [46] Rcpp_1.0.9             xtable_1.8-4           emdbook_1.3.12        
##  [49] bit_4.0.5              DT_0.26                htmlwidgets_1.6.0     
##  [52] httr_1.4.4             RColorBrewer_1.1-3     ellipsis_0.3.2        
##  [55] pkgconfig_2.0.3        XML_3.99-0.13          farver_2.1.1          
##  [58] sass_0.4.4             dbplyr_2.2.1           locfit_1.5-9.6        
##  [61] utf8_1.2.2             tidyselect_1.2.0       labeling_0.4.2        
##  [64] rlang_1.0.6            munsell_0.5.0          cellranger_1.1.0      
##  [67] tools_4.1.1            cachem_1.0.6           cli_3.4.1             
##  [70] generics_0.1.3         RSQLite_2.2.18         broom_1.0.2           
##  [73] evaluate_0.19          fastmap_1.1.0          yaml_2.3.6            
##  [76] babelgene_22.9         knitr_1.41             bit64_4.0.5           
##  [79] fs_1.5.2               KEGGREST_1.34.0        xml2_1.3.3            
##  [82] compiler_4.1.1         rstudioapi_0.14        png_0.1-8             
##  [85] reprex_2.0.2           geneplotter_1.72.0     bslib_0.4.2           
##  [88] stringi_1.7.6          highr_0.9              gdtools_0.2.4         
##  [91] lattice_0.20-44        Matrix_1.5-3           vctrs_0.5.1           
##  [94] pillar_1.8.1           lifecycle_1.0.3        jquerylib_0.1.4       
##  [97] data.table_1.14.6      bitops_1.0-7           R6_2.5.1              
## [100] gridExtra_2.3          MASS_7.3-54            assertthat_0.2.1      
## [103] withr_2.5.0            GenomeInfoDbData_1.2.7 parallel_4.1.1        
## [106] hms_1.1.2              grid_4.1.1             coda_0.19-4           
## [109] rmarkdown_2.19         googledrive_2.0.0      bbmle_1.0.25          
## [112] numDeriv_2016.8-1.1    lubridate_1.9.0